{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 使用pytorch 自动求导\n",
    "\n",
    "    2.5.1 自动求导要点\n",
    "    2.5.2计算图\n",
    "    2.5.3 标量反向传播\n",
    "    2.5.4 非标量反向传播"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "tensor([[-0.6043, -0.1153,  0.3664, -0.4803],\n",
      "        [-0.8925, -0.4327, -0.4175,  0.6769],\n",
      "        [ 0.3795,  1.0685, -1.3675,  1.3260]], requires_grad=True)\n",
      "tensor([[ 1.0355, -1.0343, -0.3214,  0.2042],\n",
      "        [ 1.3049,  0.4144,  0.7120,  1.8672],\n",
      "        [ 1.6937,  0.6042,  1.2528, -0.0939]], requires_grad=True)\n",
      "tensor(7.1465, grad_fn=<SumBackward0>)\n",
      "tensor([[1., 1., 1., 1.],\n",
      "        [1., 1., 1., 1.],\n",
      "        [1., 1., 1., 1.]])\n",
      "True True True\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "x = torch.randn(3,4,requires_grad=True)\n",
    "print(x)\n",
    "\n",
    "# 使用 requires_grad 参数\n",
    "# x.requires_grad = True\n",
    "\n",
    "y  = torch.randn(3,4 ,requires_grad=True)\n",
    "print(y)\n",
    "\n",
    "t = x + y\n",
    "z = t.sum()\n",
    "print(z)\n",
    "\n",
    "z.backward()\n",
    "print(y.grad)\n",
    "\n",
    "print(x.requires_grad, y.requires_grad, z.requires_grad)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False True True True\n",
      "True True True False False\n"
     ]
    }
   ],
   "source": [
    "# 举个例子  z = w * x +b\n",
    "x = torch.randn(1)\n",
    "b = torch.randn(1, requires_grad=True)\n",
    "w = torch.randn(1, requires_grad=True)\n",
    "y = w * x\n",
    "z = y + b\n",
    "# 查看tensor是否需要求导，计算梯度\n",
    "print(x.requires_grad, b.requires_grad, w.requires_grad, y.requires_grad)\n",
    "# 查看 是否是叶子结点\n",
    "print(x.is_leaf, b.is_leaf, w.is_leaf, y.is_leaf,z.is_leaf)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "w.grad: tensor([-2.1708])\n",
      "b.grad: tensor([3.])\n"
     ]
    }
   ],
   "source": [
    "# 反向传播计算\n",
    "z.backward(retain_graph = True)\n",
    "print('w.grad:', w.grad)\n",
    "print('b.grad:', b.grad) # 梯度会累加\n",
    "\n",
    "\n",
    "## 2.5 Tensor与Autograd\n",
    "> 参数学习离不开求导，Pytorch是如何进行求导的呢？\n",
    "\n",
    "autograd包为张量上所有的操作提供了自动求导功能，\n",
    "而torch.Tensor和torch.Function为autograd上的两个核心类，他们相互连接并生成一个有向非循环图\n",
    "\n",
    "### 2.5.1 自动求导要点\n",
    "\n",
    "autograd包为对tensor进行自动求导，需考虑如下事项：\n",
    "\n",
    "（1）创建叶子节点(leaf node)的tensor，使用requires_grad参数指定是否记录对其的操作，以便之后利用backward()方法进行梯度求解。requires_grad参数缺省值为False，如果要对其求导需设置为True，与之有依赖关系的节点自动变为True。\n",
    "\n",
    "（2）可利用requires_grad_()方法修改tensor的requires_grad属性。可以调用.detach()或with torch.no_grad():将不再计算张量的梯度，跟踪张量的历史记录。这点在评估模型、测试模型阶段常常使用。\n",
    "\n",
    "（3）通过运算创建的tensor（即非叶子节点），会自动被赋于grad_fn属性。该属性表示梯度函数。叶子节点的grad_fn为None。\n",
    "\n",
    "（4）最后得到的tensor执行backward()函数，此时自动计算各变在量的梯度，并将累加结果保存grad属性中。计算完成后，非叶子节点的梯度自动释放。\n",
    "\n",
    "（5）backward()函数接受参数，该参数应和调用backward()函数的Tensor的维度相同，或者是可broadcast的维度。如果求导的tensor为标量（即一个数字），backward中参数可省略。\n",
    "\n",
    "（6）反向传播的中间缓存会被清空，如果需要进行多次反向传播，需要指定backward中的参数retain_graph=True。多次反向传播时，梯度是累加的。\n",
    "\n",
    "（7）非叶子节点的梯度backward调用后即被清空。\n",
    "\n",
    "（8）可以通过用torch.no_grad()包裹代码块来阻止autograd去跟踪那些标记为.requesgrad=True的张量的历史记录。\n",
    "\n",
    "> 整个过程中，Pytorch采用计算图的形式进行组织，该计算图为动态图，它的计算图在每次前向传播时，将重新构建\n",
    "\n",
    "### 2.5.2计算图\n",
    "\n",
    "计算图是一种有向无环图像，用图形方式表示算子与变量之间的关系\n",
    "\n",
    "### 2.5.3 标量反向传播\n",
    "\n",
    "假设x、w、b都是标量，z=wx+b，对标量z调用backward()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "## (1)定义叶子节点及算子节点\n",
    "# 定义输入张量x\n",
    "x=torch.Tensor([2])\n",
    "#初始化权重参数W,偏移量b、并设置require_grad属性为True，为自动求导t\n",
    "w=torch.randn(1,requires_grad=True)\n",
    "b=torch.randn(1,requires_grad=True)\n",
    "#实现前向传播\n",
    "y=torch.mul(w,x)  #等价于w*x\n",
    "z=torch.add(y,b)  #等价于y+b\n",
    "#查看x,w，b 叶子节点的requite_grad属性: False,True,True\n",
    "print(\"x,w,b的require_grad属性分别为：{},{},{}\".format(x.requires_grad,w.requires_grad,b.requires_grad))\n",
    "\n",
    "# （2）查看叶子节点、非叶子节点的其他属性\n",
    "\n",
    "#查看非叶子节点的requres_grad属性,\n",
    "print(\"y，z的requires_grad属性分别为：{},{}\".format(y.requires_grad,z.requires_grad))\n",
    "#因与w，b有依赖关系，故y，z的requires_grad属性也是：True,True\n",
    "#查看各节点是否为叶子节点\n",
    "print(\"x，w，b，y，z的是否为叶子节点：{},{},{},{},{}\".format(x.is_leaf,w.is_leaf,b.is_leaf,y.is_leaf,z.is_leaf))\n",
    "#x，w，b，y，z的是否为叶子节点：True,True,True,False,False\n",
    "#查看叶子节点的grad_fn属性\n",
    "print(\"x，w，b的grad_fn属性：{},{},{}\".format(x.grad_fn,w.grad_fn,b.grad_fn))\n",
    "#因x，w，b为用户创建的，为通过其他张量计算得到，故x，w，b的grad_fn属性：None,None,None\n",
    "#查看非叶子节点的grad_fn属性\n",
    "print(\"y，z的是否为叶子节点：{},{}\".format(y.grad_fn,z.grad_fn))\n",
    "#y，z的是否为叶子节点：,\n",
    "\n",
    "# （3）自动求导，实现梯度方向传播，即梯度的反向传播。\n",
    "\n",
    "# 基于z张量进行梯度反向传播,执行backward之后计算图会自动清空，\n",
    "z.backward()\n",
    "#如果需要多次使用backward，需要修改参数retain_graph为True，此时梯度是累加的\n",
    "#z.backward(retain_graph=True)\n",
    "\n",
    "#查看叶子节点的梯度，x是叶子节点但它无需求导，故其梯度为None\n",
    "print(\"参数w,b的梯度分别为:{},{},{}\".format(w.grad,b.grad,x.grad))\n",
    "#参数w,b的梯度分别为:tensor([2.]),tensor([1.]),None\n",
    "\n",
    "#非叶子节点的梯度，执行backward之后，会自动清空\n",
    "print(\"非叶子节点y,z的梯度分别为:{},{}\".format(y.grad,z.grad))\n",
    "#非叶子节点y,z的梯度分别为:None,None\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 2.5.4 非标量反向传播\n",
    "\n",
    "Pytorch有个简单的规定，不让张量(tensor)对张量求导，只允许标量对张量求导\n",
    "\n",
    "如果目标张量对一个非标量调用backward()，需要传入一个gradient参数,该参数也是张量，而且需要与调用backward()的张量形状相同。\n",
    "\n",
    "为什么要传入一个张量gradient？\n",
    "传入这个参数就是为了把张量对张量求导转换为标量对张量求导。\n",
    "\n",
    "假设目标值为loss=(y_1,y_2,…,y_m)传入的参数为v=(v_1,v_2,…,v_m),那么就可把对loss的求导，转换为对loss*v^T标量的求导。即把原来∂loss/∂x得到雅可比矩阵(Jacobian)乘以张量v^T，便可得到我们需要的梯度矩阵。\n",
    "\n",
    "backward函数的格式为： `backward(gradient=None, retain_graph=None, create_graph=False)`\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    " （1）定义叶子叶子节点及计算节点"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import torch\n",
    "\n",
    "#定义叶子节点张量x，形状为1x2\n",
    "x= torch.tensor([[2, 3]], dtype=torch.float, requires_grad=True)\n",
    "#初始化Jacobian矩阵\n",
    "J= torch.zeros(2 ,2)\n",
    "#初始化目标张量，形状为1x2\n",
    "y = torch.zeros(1, 2)\n",
    "#定义y与x之间的映射关系：\n",
    "#y1=x1**2+3*x2，y2=x2**2+2*x1\n",
    "y[0, 0] = x[0, 0] ** 2 + 3 * x[0 ,1]\n",
    "y[0, 1] = x[0, 1] ** 2 + 2 * x[0, 0]"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "（2）手工计算y对x的梯度\n",
    "\n",
    "![](./cal_acobian.png)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "（2）调用backward获取y对x的梯度\n"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# y.backward(torch.Tensor([[1, 1]]))\n",
    "# print(x.grad)\n",
    "# 结果为tensor([[6., 9.]])\n",
    "# 错在v的取值错误，通过这种方式得的到并不是y对x的梯度。这里我们可以分成两步的计算。首先让v=(1,0)得到y_1对x的梯度，然后使v=(0,1)，得到y_2对x的梯度\n",
    "\n",
    "#生成y1对x的梯度\n",
    "y.backward(torch.Tensor([[1, 0]]),retain_graph=True)\n",
    "J[0]=x.grad\n",
    "#梯度是累加的，故需要对x的梯度清零\n",
    "x.grad = torch.zeros_like(x.grad)\n",
    "#生成y2对x的梯度\n",
    "y.backward(torch.Tensor([[0, 1]]))\n",
    "J[1]=x.grad\n",
    "print(J)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 2.6 使用Numpy实现机器学习\n",
    "\n",
    "1. 给出一个数组x，然后基于表达式：y=3x^2+2，加上一些噪音数据到达另一组数据y。\n",
    "2. 构建一个机器学习模型，学习表达式y=wx^2+b的两个参数w，b。利用数组x，y的数据为训练数据\n",
    "3. 采用梯度梯度下降法，通过多次迭代，学习到w、b的值"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "import numpy as np\n",
    "%matplotlib inline\n",
    "from matplotlib import pyplot as plt\n",
    "# 生成输入数据x及目标数据y\n",
    "np.random.seed(100)\n",
    "x = np.linspace(-1, 1, 100).reshape(100,1)\n",
    "y = 3*np.power(x, 2) +2+ 0.2*np.random.rand(x.size).reshape(100,1)\n",
    "\n",
    "# 画图\n",
    "plt.scatter(x, y)\n",
    "plt.show()"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "训练模型 - 定义损失函数，假设批量大小为100：\n",
    "\n",
    "![](ch02_loss.png)"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# 随机初始化参数\n",
    "w1 = np.random.rand(1,1)\n",
    "b1 = np.random.rand(1,1)\n",
    "\n",
    "# 训练模型\n",
    "lr =0.001 # 学习率\n",
    "\n",
    "for i in range(800):\n",
    "    # 前向传播\n",
    "    y_pred = np.power(x,2)*w1 + b1\n",
    "    # 定义损失函数\n",
    "    loss = 0.5 * (y_pred - y) ** 2\n",
    "    loss = loss.sum()\n",
    "    #计算梯度\n",
    "    grad_w=np.sum((y_pred - y)*np.power(x,2))\n",
    "    grad_b=np.sum((y_pred - y))\n",
    "    #使用梯度下降法，是loss最小\n",
    "    w1 -= lr * grad_w\n",
    "    b1 -= lr * grad_b\n",
    "\n",
    "plt.plot(x, y_pred,'r-',label='predict')\n",
    "plt.scatter(x, y,color='blue',marker='o',label='true') # true data\n",
    "plt.xlim(-1,1)\n",
    "plt.ylim(2,6)\n",
    "plt.legend()\n",
    "plt.show()\n",
    "print(w1,b1)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 2.7 使用Tensor及antograd实现机器学习"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "import torch\n",
    "# 准备数据\n",
    "torch.manual_seed(100)\n",
    "#生成x坐标数据，x为tenor，需要把x的形状转换为100x1\n",
    "x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)\n",
    "#生成y坐标数据，y为tenor，形状为100x1，另加上一些噪音\n",
    "y = 3*x.pow(2) +2+ 0.2*torch.rand(x.size())\n",
    "\n",
    "# 画图，把tensor数据转换为numpy数据\n",
    "plt.scatter(x.numpy(), y.numpy())\n",
    "plt.show()\n",
    "\n",
    "\n",
    "# 初始化权重参数\n",
    "\n",
    "# 随机初始化参数，参数w，b为需要学习的，故需requires_grad=True\n",
    "w = torch.randn(1,1, dtype=torch.float,requires_grad=True)\n",
    "b = torch.zeros(1,1, dtype=torch.float, requires_grad=True)\n",
    "\n",
    "# 训练模型\n",
    "lr =0.001 # 学习率\n",
    "for ii in range(800):\n",
    "    # 前向传播，并定义损失函数loss\n",
    "    y_pred = x.pow(2).mm(w) + b\n",
    "    loss = 0.5 * (y_pred - y) ** 2\n",
    "    loss = loss.sum()\n",
    "\n",
    "    # 自动计算梯度，梯度存放在grad属性中\n",
    "    loss.backward()\n",
    "\n",
    "    # 手动更新参数，需要用torch.no_grad()，使上下文环境中切断自动求导的计算\n",
    "    with torch.no_grad():\n",
    "        w -= lr * w.grad\n",
    "        b -= lr * b.grad\n",
    "\n",
    "    # 梯度清零\n",
    "        w.grad.zero_()\n",
    "        b.grad.zero_()\n",
    "# 可视化训练结果\n",
    "plt.plot(x.numpy(), y_pred.detach().numpy(),'r-',label='predict')#predict\n",
    "plt.scatter(x.numpy(), y.numpy(),color='blue',marker='o',label='true') # true data\n",
    "plt.xlim(-1,1)\n",
    "plt.ylim(2,6)\n",
    "plt.legend()\n",
    "plt.show()\n",
    "\n",
    "print(w, b)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}